STEP 1: GENERAL DEMOGRAPHICS

In [11]:
#import statements
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress

#reading in data
df = pd.read_csv("loan_data.csv")
df
Out[11]:
credit.policy purpose int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
0 1 debt_consolidation 0.1189 829.10 11.350407 19.48 737 5639.958333 28854 52.1 0 0 0 0
1 1 credit_card 0.1071 228.22 11.082143 14.29 707 2760.000000 33623 76.7 0 0 0 0
2 1 debt_consolidation 0.1357 366.86 10.373491 11.63 682 4710.000000 3511 25.6 1 0 0 0
3 1 debt_consolidation 0.1008 162.34 11.350407 8.10 712 2699.958333 33667 73.2 1 0 0 0
4 1 credit_card 0.1426 102.92 11.299732 14.97 667 4066.000000 4740 39.5 0 1 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9573 0 all_other 0.1461 344.76 12.180755 10.39 672 10474.000000 215372 82.1 2 0 0 1
9574 0 all_other 0.1253 257.70 11.141862 0.21 722 4380.000000 184 1.1 5 0 0 1
9575 0 debt_consolidation 0.1071 97.81 10.596635 13.09 687 3450.041667 10036 82.9 8 0 0 1
9576 0 home_improvement 0.1600 351.58 10.819778 19.18 692 1800.000000 0 3.2 5 0 0 1
9577 0 debt_consolidation 0.1392 853.43 11.264464 16.28 732 4740.000000 37879 57.0 6 0 0 1

9578 rows × 14 columns

In [36]:
#breakdown of data
df.describe()
Out[36]:
credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 9578.000000 9578.000000 9578.000000 9578.000000 9578.000000 9578.000000 9578.000000 9.578000e+03 9578.000000 9578.000000 9578.000000 9578.000000 9578.000000
mean 0.804970 0.122640 319.089413 10.932117 12.606679 710.846314 4560.767197 1.691396e+04 46.799236 1.577469 0.163708 0.062122 0.160054
std 0.396245 0.026847 207.071301 0.614813 6.883970 37.970537 2496.930377 3.375619e+04 29.014417 2.200245 0.546215 0.262126 0.366676
min 0.000000 0.060000 15.670000 7.547502 0.000000 612.000000 178.958333 0.000000e+00 0.000000 0.000000 0.000000 0.000000 0.000000
25% 1.000000 0.103900 163.770000 10.558414 7.212500 682.000000 2820.000000 3.187000e+03 22.600000 0.000000 0.000000 0.000000 0.000000
50% 1.000000 0.122100 268.950000 10.928884 12.665000 707.000000 4139.958333 8.596000e+03 46.300000 1.000000 0.000000 0.000000 0.000000
75% 1.000000 0.140700 432.762500 11.291293 17.950000 737.000000 5730.000000 1.824950e+04 70.900000 2.000000 0.000000 0.000000 0.000000
max 1.000000 0.216400 940.140000 14.528354 29.960000 827.000000 17639.958330 1.207359e+06 119.000000 33.000000 13.000000 5.000000 1.000000
In [265]:
df['credit.policy'].value_counts().plot(kind='bar')
df.groupby('credit.policy').size()
Out[265]:
credit.policy
0    1868
1    7710
dtype: int64
In [264]:
df['purpose'].value_counts().plot(kind='pie')
df.groupby('purpose').size()
Out[264]:
purpose
all_other             2331
credit_card           1262
debt_consolidation    3957
educational            343
home_improvement       629
major_purchase         437
small_business         619
dtype: int64
In [52]:
df.groupby('inq.last.6mths').size()
Out[52]:
inq.last.6mths
0     3637
1     2462
2     1384
3      864
4      475
5      278
6      165
7      100
8       72
9       47
10      23
11      15
12      15
13       6
14       6
15       9
16       3
17       2
18       4
19       2
20       1
24       2
25       1
27       1
28       1
31       1
32       1
33       1
dtype: int64
In [51]:
df.groupby('delinq.2yrs').size()
Out[51]:
delinq.2yrs
0     8458
1      832
2      192
3       65
4       19
5        6
6        2
7        1
8        1
11       1
13       1
dtype: int64
In [268]:
df.groupby('pub.rec').size()
Out[268]:
pub.rec
0    9019
1     533
2      19
3       5
4       1
5       1
dtype: int64
In [266]:
df['not.fully.paid'].value_counts().plot(kind='bar')
df.groupby('not.fully.paid').size()
Out[266]:
not.fully.paid
0    8045
1    1533
dtype: int64

STEP 2: PRELIMINARY IDENTIFYING TRENDS

Interest rate on x-axis

In [162]:
#interest rate by installment
x = df['int.rate']
y = df['installment']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 2129.874205712429
Intercept is: 57.88151167463093
In [42]:
#interest rate by annual income
plt.scatter(df['int.rate'], df['log.annual.inc'])
plt.show()
In [43]:
#interest rate by debt to income
plt.scatter(df['int.rate'], df['dti'])
plt.show()
In [163]:
#interest rate by fico
x = df['int.rate']
y = df['fico']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -1010.9934681265688
Intercept is: 834.8346146228582
In [166]:
#interest rate by credit line
x = df['int.rate']
y = df['days.with.cr.line']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -11534.803089964835
Intercept is: 5975.396145977571
In [46]:
#interest rate by revolving balance
plt.scatter(df['int.rate'], df['revol.bal'])
plt.show()
In [47]:
#interest rate by utilization rate
plt.scatter(df['int.rate'], df['revol.util'])
plt.show()
In [48]:
#interest rate by inquiry of last six months
plt.scatter(df['int.rate'], df['inq.last.6mths'])
plt.show()

Installment on x-axis

In [167]:
#installment by annual income
x = df['installment']
y = df['log.annual.inc']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 0.001330454370076129
Intercept is: 10.507583233511852
In [56]:
#installment by debt to income
plt.scatter(df['installment'], df['dti'])
plt.show()
In [58]:
#installment by fico
plt.scatter(df['installment'], df['fico'])
plt.show()
In [59]:
#installment by credit line
plt.scatter(df['installment'], df['days.with.cr.line'])
plt.show()
In [60]:
#installment by revolving balance
plt.scatter(df['installment'], df['revol.bal'])
plt.show()
In [61]:
#installment by utilization rate
plt.scatter(df['installment'], df['revol.util'])
plt.show()
In [62]:
#installment by inquiry of last six months
plt.scatter(df['installment'], df['inq.last.6mths'])
plt.show()

Annual income on x-axis

In [64]:
#annual income by debt to income
plt.scatter(df['log.annual.inc'], df['dti'])
plt.show()
In [65]:
#annual income by fico
plt.scatter(df['log.annual.inc'], df['fico'])
plt.show()
In [66]:
#annual income by credit line
plt.scatter(df['log.annual.inc'], df['days.with.cr.line'])
plt.show()
In [201]:
#annual income by revolving balance
x = df['log.annual.inc']
y = df['revol.bal']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 20432.261589714253
Intercept is: -206453.91321338527
In [106]:
#annual income by utilization rate
plt.scatter(df['log.annual.inc'], df['revol.util'])
plt.show()
In [107]:
#annual income by inquiry of last six months
plt.scatter(df['log.annual.inc'], df['inq.last.6mths'])
plt.show()

Debt to income on x-axis

In [108]:
#dti by fico
plt.scatter(df['dti'], df['fico'])
plt.show()
In [109]:
#dti by credit line
plt.scatter(df['dti'], df['days.with.cr.line'])
plt.show()
In [110]:
#dti by revolving balance
plt.scatter(df['dti'], df['revol.bal'])
plt.show()
In [111]:
#dti by utilization rate
plt.scatter(df['dti'], df['revol.util'])
plt.show()
In [112]:
#dti by inquiry of last six months
plt.scatter(df['dti'], df['inq.last.6mths'])
plt.show()

FICO on x-axis

In [168]:
#fico by credit line
x = df['fico']
y = df['days.with.cr.line']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 17.352647822578994
Intercept is: -7774.298554458422
In [114]:
#fico by revolving balance
plt.scatter(df['fico'], df['revol.bal'])
plt.show()
In [169]:
#fico by utilization rate
x = df['fico']
y = df['revol.util']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -0.4136152820591314
Intercept is: 340.8161346090674
In [170]:
#fico by inquiry of last six months
x = df['fico']
y = df['inq.last.6mths']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -0.010737009743086594
Intercept is: 9.209833004559268

Credit line on x-axis

In [118]:
#credit line by revolving balance
plt.scatter(df['days.with.cr.line'], df['revol.bal'])
plt.show()
In [119]:
#credit line by utilization rate
plt.scatter(df['days.with.cr.line'], df['revol.util'])
plt.show()
In [212]:
#fico by inquiry of last six months
x = df['days.with.cr.line']
y = df['inq.last.6mths']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -3.677730508657615e-05
Intercept is: 1.7452019268661776

Revolving balance on x-axis

In [122]:
#revolving balance by utilization rate
plt.scatter(df['revol.bal'], df['revol.util'])
plt.show()
In [214]:
#revolving balance by inquiry of last six months
plt.scatter(df['revol.bal'], df['inq.last.6mths'])
plt.show()

Utilization rate on x-axis

In [124]:
#utilization rate by inquiry of last six months
plt.scatter(df['revol.util'], df['inq.last.6mths'])
plt.show()

STEP 3: CATEGORICAL BREAKDOWN

By credit policy

In [150]:
#by credit policy
hasCreditPolicy =  df['credit.policy']==1
df_hasCreditPolicy = df[hasCreditPolicy]

noCreditPolicy = df['credit.policy']==0
df_noCreditPolicy = df[noCreditPolicy]

df_hasCreditPolicy = df_hasCreditPolicy.reset_index()
df_noCreditPolicy = df_noCreditPolicy.reset_index()
In [154]:
#have credit policy metrics
df_hasCreditPolicy.describe()
Out[154]:
int.rate credit.policy installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid trendline
count 7710.000000 7710.0 7710.000000 7710.000000 7710.000000 7710.000000 7710.000000 7710.000000 7710.000000 7710.000000 7710.000000 7710.000000 7710.000000 7710.000000
mean 0.118754 1.0 325.079200 10.942680 12.298684 717.356031 4682.468461 13798.404280 45.312677 0.997536 0.143191 0.055123 0.131518 310.812527
std 0.025571 0.0 205.611447 0.585371 6.627485 36.630697 2429.932117 16878.560424 28.821751 1.152580 0.469033 0.241491 0.337987 54.462119
min 0.060000 1.0 15.690000 8.294050 0.000000 627.000000 1110.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 185.673964
25% 0.097600 1.0 166.500000 10.596535 7.130000 687.000000 2970.010417 3334.250000 21.000000 0.000000 0.000000 0.000000 0.000000 265.757234
50% 0.118900 1.0 278.605000 10.933107 12.380000 712.000000 4230.041667 8707.500000 44.300000 1.000000 0.000000 0.000000 0.000000 311.123555
75% 0.135700 1.0 447.747500 11.289819 17.520000 742.000000 5789.958333 17579.750000 68.675000 2.000000 0.000000 0.000000 0.000000 346.905441
max 0.212100 1.0 918.020000 14.528354 29.420000 827.000000 17616.000000 149527.000000 99.900000 8.000000 6.000000 4.000000 1.000000 509.627831
In [153]:
#no credit policy metrics
df_noCreditPolicy.describe()
Out[153]:
int.rate credit.policy installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid trendline
count 1868.000000 1868.0 1868.000000 1868.000000 1868.000000 1868.000000 1868.000000 1.868000e+03 1868.000000 1868.000000 1868.000000 1868.000000 1868.000000 1868.000000
mean 0.138680 0.0 294.367115 10.888520 13.877901 683.978051 4058.456304 2.977315e+04 52.934872 3.971092 0.248394 0.091006 0.277837 353.251509
std 0.026032 0.0 211.256726 0.722315 7.727552 30.950970 2699.510897 6.680757e+04 29.008721 3.496608 0.783037 0.332597 0.448053 55.443886
min 0.060000 0.0 15.670000 7.547502 0.000000 612.000000 178.958333 0.000000e+00 0.000000 0.000000 0.000000 0.000000 0.000000 185.673964
25% 0.122200 0.0 138.867500 10.463103 7.757500 657.000000 2009.729167 2.469000e+03 29.775000 1.000000 0.000000 0.000000 0.000000 318.152140
50% 0.138700 0.0 240.610000 10.896739 14.085000 682.000000 3690.000000 8.039500e+03 54.650000 4.000000 0.000000 0.000000 0.000000 353.295064
75% 0.154500 0.0 392.840000 11.314475 19.945000 702.000000 5490.000000 2.327450e+04 77.800000 5.000000 0.000000 0.000000 1.000000 386.947076
max 0.216400 0.0 940.140000 13.458836 29.960000 807.000000 17639.958330 1.207359e+06 119.000000 33.000000 13.000000 5.000000 1.000000 518.786290
In [229]:
#interest rate by installment
#has credit policy
x = df_hasCreditPolicy['int.rate']
y = df_hasCreditPolicy['installment']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['int.rate']
y = df_noCreditPolicy['installment']

df_noCreditPolicy.plot(x='int.rate', y='installment', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 2582.2316154625228
Intercept is: 18.428946858710844
Slope is: 2060.0000540663855
Intercept is: 8.687233401455615
In [175]:
#interest rate by annual income
df_hasCreditPolicy.plot(x='int.rate', y='log.annual.inc', style='o')
df_noCreditPolicy.plot(x='int.rate', y='log.annual.inc', style='rx')
Out[175]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1ef3a790>
In [230]:
#interest rate by debt to income
#has credit policy
df_hasCreditPolicy.plot(x='int.rate', y='dti', style='o')

#no credit policy
#no credit policy
x = df_noCreditPolicy['int.rate']
y = df_noCreditPolicy['dti']

df_noCreditPolicy.plot(x='int.rate', y='dti', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 87.16052869657973
Intercept is: 1.7905185735296847
In [232]:
#interest rate by fico
#has credit policy
x = df_hasCreditPolicy['int.rate']
y = df_hasCreditPolicy['fico']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['int.rate']
y = df_noCreditPolicy['fico']

df_noCreditPolicy.plot(x='int.rate', y='fico', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -1032.8049170004344
Intercept is: 840.0057140923049
Slope is: -610.6974650299903
Intercept is: 768.6693012245183
In [233]:
#interest rate by days w/ credit
#has credit policy
x = df_hasCreditPolicy['int.rate']
y = df_hasCreditPolicy['days.with.cr.line']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
df_noCreditPolicy.plot(x='int.rate', y='days.with.cr.line', style='rx')
Slope is: -13852.895528742576
Intercept is: 6327.554785278022
Out[233]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21545750>
In [235]:
#interest rate by revolving balance
df_hasCreditPolicy.plot(x='int.rate', y='revol.bal', style='o')
df_noCreditPolicy.plot(x='int.rate', y='revol.bal', style='rx')
Out[235]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21bd8790>
In [236]:
#interest rate by utilization rate
#has credit policy
x = df_hasCreditPolicy['int.rate']
y = df_hasCreditPolicy['revol.util']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['int.rate']
y = df_noCreditPolicy['revol.util']

df_noCreditPolicy.plot(x='int.rate', y='revol.util', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 553.2425565721926
Intercept is: -20.387072298814417
Slope is: 356.20713164453036
Intercept is: 3.5362266826749575
In [196]:
#interest rate by inquisition by last 6 months
df_hasCreditPolicy.plot(x='int.rate', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='int.rate', y='inq.last.6mths', style='rx')
Out[196]:
<matplotlib.axes._subplots.AxesSubplot at 0x11a7a0a90>
In [237]:
#installment by annual income
#has credit policy
x = df_hasCreditPolicy['installment']
y = df_hasCreditPolicy['log.annual.inc']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['installment']
y = df_noCreditPolicy['log.annual.inc']

df_noCreditPolicy.plot(x='installment', y='log.annual.inc', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 0.0012096000190208833
Intercept is: 10.549464195105813
Slope is: 0.0017957876913329346
Intercept is: 10.359899038950962
In [187]:
#installment by debt to income
df_hasCreditPolicy.plot(x='installment', y='dti', style='o')
df_noCreditPolicy.plot(x='installment', y='dti', style='rx')
Out[187]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1dc22790>
In [188]:
#installment by fico
df_hasCreditPolicy.plot(x='installment', y='fico', style='o')
df_noCreditPolicy.plot(x='installment', y='fico', style='rx')
Out[188]:
<matplotlib.axes._subplots.AxesSubplot at 0x11818ed90>
In [189]:
#installment by days w/ credit
df_hasCreditPolicy.plot(x='installment', y='days.with.cr.line', style='o')
df_noCreditPolicy.plot(x='installment', y='days.with.cr.line', style='rx')
Out[189]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1ddce450>
In [191]:
#installment by revolving balance
df_hasCreditPolicy.plot(x='installment', y='revol.bal', style='o')
df_noCreditPolicy.plot(x='installment', y='revol.bal', style='rx')
Out[191]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e3f9b90>
In [193]:
#installment by utilization rate
df_hasCreditPolicy.plot(x='installment', y='revol.util', style='o')
df_noCreditPolicy.plot(x='installment', y='revol.util', style='rx')
Out[193]:
<matplotlib.axes._subplots.AxesSubplot at 0x119dbc610>
In [194]:
#installment by inquisitions in last 6 months
df_hasCreditPolicy.plot(x='installment', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='installment', y='inq.last.6mths', style='rx')
Out[194]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1dc3b850>
In [195]:
#annual income by debt to income
df_hasCreditPolicy.plot(x='log.annual.inc', y='dti', style='o')
df_noCreditPolicy.plot(x='log.annual.inc', y='dti', style='rx')
Out[195]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e12bdd0>
In [197]:
#annual income by fico
df_hasCreditPolicy.plot(x='log.annual.inc', y='fico', style='o')
df_noCreditPolicy.plot(x='log.annual.inc', y='fico', style='rx')
Out[197]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1da991d0>
In [238]:
#annual income by days w/ credit
#has credit policy
x = df_hasCreditPolicy['log.annual.inc']
y = df_hasCreditPolicy['days.with.cr.line']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['log.annual.inc']
y = df_noCreditPolicy['days.with.cr.line']

df_noCreditPolicy.plot(x='log.annual.inc', y='days.with.cr.line', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 1220.0089386675131
Intercept is: -8667.698953682275
Slope is: 1724.2136119322233
Intercept is: -14715.677887379152
In [239]:
#annual income by revolving balance
#has credit policy
x = df_hasCreditPolicy['log.annual.inc']
y = df_hasCreditPolicy['revol.bal']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['log.annual.inc']
y = df_noCreditPolicy['revol.bal']

df_noCreditPolicy.plot(x='log.annual.inc', y='revol.bal', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 11902.15867541023
Intercept is: -116443.1094294996
Slope is: 44992.5263962133
Intercept is: -460128.86554757814
In [200]:
#annual income by utilization rate
df_hasCreditPolicy.plot(x='log.annual.inc', y='revol.util', style='o')
df_noCreditPolicy.plot(x='log.annual.inc', y='revol.util', style='rx')
Out[200]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1d885c50>
In [202]:
#annual income by inquisitions in last 6 months
df_hasCreditPolicy.plot(x='log.annual.inc', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='log.annual.inc', y='inq.last.6mths', style='rx')
Out[202]:
<matplotlib.axes._subplots.AxesSubplot at 0x11a375490>
In [203]:
#debt to income by fico
df_hasCreditPolicy.plot(x='dti', y='fico', style='o')
df_noCreditPolicy.plot(x='dti', y='fico', style='rx')
Out[203]:
<matplotlib.axes._subplots.AxesSubplot at 0x1187544d0>
In [204]:
#debt to income by days w/ credit
df_hasCreditPolicy.plot(x='dti', y='days.with.cr.line', style='o')
df_noCreditPolicy.plot(x='dti', y='days.with.cr.line', style='rx')
Out[204]:
<matplotlib.axes._subplots.AxesSubplot at 0x118757850>
In [241]:
#debt to income by revolving balance
#has credit policy
x = df_hasCreditPolicy['dti']
y = df_hasCreditPolicy['revol.bal']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['dti']
y = df_noCreditPolicy['revol.bal']

df_noCreditPolicy.plot(x='dti', y='revol.bal', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 620.384816202651
Intercept is: 6168.487760173601
Slope is: 1543.2316091045973
Intercept is: 8356.336308405327
In [206]:
#debt to income by utilization rate
df_hasCreditPolicy.plot(x='dti', y='revol.util', style='o')
df_noCreditPolicy.plot(x='dti', y='revol.util', style='rx')
Out[206]:
<matplotlib.axes._subplots.AxesSubplot at 0x118954c90>
In [207]:
#debt to income by inquisitions in last 6 months
df_hasCreditPolicy.plot(x='dti', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='dti', y='inq.last.6mths', style='rx')
Out[207]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1cadca50>
In [243]:
#fico by days w/ credit
#has credit policy
x = df_hasCreditPolicy['fico']
y = df_hasCreditPolicy['days.with.cr.line']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['fico']
y = df_noCreditPolicy['days.with.cr.line']

df_noCreditPolicy.plot(x='fico', y='days.with.cr.line', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 16.6200664733425
Intercept is: -7240.036461530974
Slope is: 20.331942821558282
Intercept is: -9848.146328566592
In [244]:
#fico by revolving balance
#has credit policy
x = df_hasCreditPolicy['fico']
y = df_hasCreditPolicy['revol.bal']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['fico']
y = df_noCreditPolicy['revol.bal']

df_noCreditPolicy.plot(x='fico', y='revol.bal', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -34.48387270396213
Intercept is: 38535.61834100704
Slope is: 540.9857905094798
Intercept is: -340249.25425376743
In [245]:
#fico by utilization rate
#has credit policy
x = df_hasCreditPolicy['fico']
y = df_hasCreditPolicy['revol.util']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#no credit policy
x = df_noCreditPolicy['fico']
y = df_noCreditPolicy['revol.util']

df_noCreditPolicy.plot(x='fico', y='revol.util', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -0.45461647782065884
Intercept is: 371.4345492578038
Slope is: -0.35000414602373026
Intercept is: 292.3300252967267
In [395]:
#f#fico by inquisitions in last 6 months
df_hasCreditPolicy.plot(x='fico', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='fico', y='inq.last.6mths', style='rx')
Out[395]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a20092e10>
In [271]:
#purpose for w/ credit policy
df_hasCreditPolicy['purpose'].value_counts().plot(kind='pie')
df_hasCreditPolicy.groupby('purpose').size()
Out[271]:
purpose
all_other             1835
credit_card           1020
debt_consolidation    3223
educational            254
home_improvement       512
major_purchase         371
small_business         495
dtype: int64
In [272]:
#purpose for no credit policy
df_noCreditPolicy['purpose'].value_counts().plot(kind='pie')
df_noCreditPolicy.groupby('purpose').size()
Out[272]:
purpose
all_other             496
credit_card           242
debt_consolidation    734
educational            89
home_improvement      117
major_purchase         66
small_business        124
dtype: int64
In [324]:
#fully paid for w/ credit policy
df_hasCreditPolicy['not.fully.paid'].value_counts().plot(kind='pie')
print(df_hasCreditPolicy.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(6696/(6696+1014)))
not.fully.paid
0    6696
1    1014
dtype: int64

Percent who have fully paid: 0.8684824902723736
In [323]:
#fully paid for w/ no credit policy
df_noCreditPolicy['not.fully.paid'].value_counts().plot(kind='pie')
print(df_hasCreditPolicy.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(1349/(1349+519)))
not.fully.paid
0    6696
1    1014
dtype: int64

Percent who have fully paid: 0.7221627408993576

By purpose

In [251]:
#by credit policy
allOther = df['purpose']=='all_other'
df_allOther = df[allOther]

creditCard = df['purpose']=='credit_card'
df_creditCard = df[creditCard]

debtConsol = df['purpose']=='debt_consolidation'
df_debtConsol = df[debtConsol]

education = df['purpose']=='educational'
df_education = df[education]

homeImprov = df['purpose']=='home_improvement'
df_homeImprov = df[homeImprov]

majorPurch = df['purpose']=='major_purchase'
df_majorPurch = df[majorPurch]

smallBus = df['purpose']=='small_business'
df_smallBus = df[smallBus]

df_allOther = df_allOther.reset_index()
df_creditCard = df_creditCard.reset_index()
df_debtConsol = df_debtConsol.reset_index()
df_education = df_education.reset_index()
df_homeImprov = df_homeImprov.reset_index()
df_majorPurch = df_majorPurch.reset_index()
df_smallBus = df_smallBus.reset_index()
In [276]:
#ALL OTHER
df_allOther.describe()
Out[276]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000 2331.000000
mean 4902.716860 0.787216 0.116771 244.937542 10.845313 11.079498 715.344058 4311.706153 12883.039897 39.712321 1.646504 0.179751 0.048048 0.166023
std 2793.211695 0.409364 0.027152 184.272991 0.660981 7.099443 40.059548 2559.183077 31770.912348 29.347922 2.284179 0.588037 0.229404 0.372181
min 7.000000 0.000000 0.063900 15.690000 7.600902 0.000000 632.000000 178.958333 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2574.000000 1.000000 0.093200 111.580000 10.463103 5.005000 682.000000 2489.979167 1439.500000 13.350000 0.000000 0.000000 0.000000 0.000000
50% 4999.000000 1.000000 0.118300 190.630000 10.839581 10.560000 712.000000 3869.958333 4689.000000 36.500000 1.000000 0.000000 0.000000 0.000000
75% 7365.000000 1.000000 0.135500 319.470000 11.230118 16.560000 742.000000 5520.000000 12679.500000 62.200000 2.000000 0.000000 0.000000 0.000000
max 9574.000000 1.000000 0.216400 916.950000 13.997832 29.900000 822.000000 17639.958330 602519.000000 108.800000 31.000000 11.000000 3.000000 1.000000
In [291]:
#credit policy for other
df_allOther['credit.policy'].value_counts().plot(kind='pie')

print(df_allOther.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(496/(1835+496)))
credit.policy
0     496
1    1835
dtype: int64

Percent who do not have a credit policy: 0.21278421278421278
In [322]:
#fully paid for other
df_allOther['not.fully.paid'].value_counts().plot(kind='pie')

print(df_allOther.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(1944/(1944+387)))
not.fully.paid
0    1944
1     387
dtype: int64

Percent who have fully paid: 0.833976833976834
In [253]:
#CREDIT CARD
df_creditCard.describe()
Out[253]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000 1262.000000
mean 4497.358954 0.808241 0.119738 319.500713 11.047230 14.099398 709.626783 4857.005943 23180.020602 53.600475 1.387480 0.151347 0.072108 0.115689
std 2861.502040 0.393841 0.024906 198.228995 0.568385 6.474297 35.590267 2508.050807 33636.249864 27.149591 2.138266 0.508526 0.314137 0.319979
min 1.000000 0.000000 0.060000 16.730000 8.922658 0.000000 627.000000 183.041667 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 1829.500000 1.000000 0.100800 171.150000 10.710782 9.182500 682.000000 3089.958333 6432.000000 32.300000 0.000000 0.000000 0.000000 0.000000
50% 4330.500000 1.000000 0.118900 266.675000 11.050890 14.380000 707.000000 4440.000000 13645.000000 54.400000 1.000000 0.000000 0.000000 0.000000
75% 7109.750000 1.000000 0.135700 426.110000 11.407565 19.220000 737.000000 6119.989583 26433.250000 76.675000 2.000000 0.000000 0.000000 0.000000
max 9559.000000 1.000000 0.208600 922.420000 13.142166 29.950000 812.000000 17616.000000 394107.000000 106.400000 32.000000 5.000000 5.000000 1.000000
In [292]:
#credit policy for credit card
df_creditCard['credit.policy'].value_counts().plot(kind='pie')

print(df_creditCard.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(496/(1835+496)))
credit.policy
0     242
1    1020
dtype: int64

Percent who do not have a credit policy: 0.21278421278421278
In [321]:
#fully paid for credit card
df_creditCard['not.fully.paid'].value_counts().plot(kind='pie')

print(df_creditCard.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(1116/(1116+146)))
not.fully.paid
0    1116
1     146
dtype: int64

Percent who have fully paid: 0.884310618066561
In [254]:
#DEBT CONSOLIDATION
df_debtConsol.describe()
Out[254]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000 3957.000000
mean 4835.371241 0.814506 0.126595 358.984390 10.912909 14.076462 703.871367 4533.037139 17146.710639 54.125504 1.461461 0.163255 0.070508 0.152388
std 2716.671892 0.388747 0.024769 198.309002 0.547477 6.433460 34.397778 2340.567954 24167.207708 26.715131 2.059245 0.561788 0.262854 0.359442
min 0.000000 0.000000 0.060000 23.210000 7.547502 0.000000 612.000000 180.041667 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2481.000000 1.000000 0.111400 201.520000 10.571317 9.200000 677.000000 2925.000000 5494.000000 33.200000 0.000000 0.000000 0.000000 0.000000
50% 4933.000000 1.000000 0.128000 325.080000 10.903815 14.240000 697.000000 4114.041667 10868.000000 55.300000 1.000000 0.000000 0.000000 0.000000
75% 7141.000000 1.000000 0.142600 491.300000 11.238436 19.130000 727.000000 5639.958333 19469.000000 76.300000 2.000000 0.000000 0.000000 0.000000
max 9577.000000 1.000000 0.212100 940.140000 14.528354 29.960000 822.000000 16259.041670 290341.000000 119.000000 33.000000 13.000000 3.000000 1.000000
In [295]:
#credit policy for debt consolidation
df_debtConsol['credit.policy'].value_counts().plot(kind='pie')

print(df_debtConsol.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(734/(3223+734)))
credit.policy
0     734
1    3223
dtype: int64

Percent who do not have a credit policy: 0.18549406115744252
In [320]:
#fully paid for debt consolidation
df_debtConsol['not.fully.paid'].value_counts().plot(kind='pie')

print(df_debtConsol.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(3354/(3354+603)))
not.fully.paid
0    3354
1     603
dtype: int64

Percent who have fully paid: 0.8476118271417741
In [255]:
#EDUCATION
df_education.describe()
Out[255]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000 343.000000
mean 4881.064140 0.740525 0.119907 217.547201 10.549956 11.344927 708.282799 4008.595724 10828.897959 38.801166 1.854227 0.157434 0.043732 0.201166
std 2840.242283 0.438987 0.026551 168.505475 0.759202 6.939110 38.365188 2820.744277 23042.339207 29.866897 2.416265 0.450791 0.204796 0.401458
min 46.000000 0.000000 0.060000 15.670000 8.101678 0.000000 612.000000 299.958333 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2435.500000 0.000000 0.097600 99.925000 10.085809 5.425000 677.000000 1814.979166 1143.500000 10.700000 0.000000 0.000000 0.000000 0.000000
50% 4519.000000 1.000000 0.121800 169.620000 10.596635 11.420000 707.000000 3389.958333 3745.000000 35.600000 1.000000 0.000000 0.000000 0.000000
75% 7772.500000 1.000000 0.135700 278.430000 11.130042 16.385000 737.000000 5310.041667 9243.500000 61.250000 2.000000 0.000000 0.000000 0.000000
max 9547.000000 1.000000 0.201100 861.880000 12.206073 29.740000 807.000000 14310.000000 226567.000000 102.700000 16.000000 3.000000 1.000000 1.000000
In [299]:
#credit policy for education
df_education['credit.policy'].value_counts().plot(kind='pie')

print(df_education.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(89/(89+254)))
credit.policy
0     89
1    254
dtype: int64

Percent who do not have a credit policy: 0.2594752186588921
In [319]:
#fully paid for education
df_education['not.fully.paid'].value_counts().plot(kind='pie')

print(df_education.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(274/(274+69)))
not.fully.paid
0    274
1     69
dtype: int64

Percent who have fully paid: 0.7988338192419825
In [302]:
#HOME IMPROVEMENT
df_homeImprov.describe()
Out[302]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 629.000000 629.000000 629.000000 629.00000 629.000000 629.000000 629.000000 629.000000 629.000000 629.000000 629.000000 629.000000 629.000000 629.000000
mean 4689.241653 0.813990 0.117507 337.07159 11.201979 10.197504 724.806041 5201.995562 17328.756757 34.274626 1.941176 0.136725 0.066773 0.170111
std 2759.812776 0.389424 0.026889 222.10867 0.592289 6.775213 43.561991 2529.042580 34249.738350 27.980053 2.357001 0.448354 0.279887 0.376030
min 8.000000 0.000000 0.060000 28.47000 9.392662 0.000000 632.000000 238.958333 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2534.000000 1.000000 0.094500 163.43000 10.819858 4.300000 687.000000 3510.041667 2054.000000 8.800000 0.000000 0.000000 0.000000 0.000000
50% 4473.000000 1.000000 0.118300 282.40000 11.184421 9.660000 722.000000 4807.000000 6713.000000 30.000000 1.000000 0.000000 0.000000 0.000000
75% 6875.000000 1.000000 0.134800 491.37000 11.544812 15.100000 757.000000 6484.958333 17125.000000 52.300000 3.000000 0.000000 0.000000 0.000000
max 9576.000000 1.000000 0.216400 902.06000 13.487006 28.170000 827.000000 14761.041670 311616.000000 99.700000 15.000000 4.000000 3.000000 1.000000
In [304]:
#credit policy for home improvement
df_homeImprov['credit.policy'].value_counts().plot(kind='pie')

print(df_homeImprov.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(117/(117+512)))
credit.policy
0    117
1    512
dtype: int64

Percent who do not have a credit policy: 0.18600953895071543
In [318]:
#fully paid for home improvement
df_homeImprov['not.fully.paid'].value_counts().plot(kind='pie')

print(df_homeImprov.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(522/(522+107)))
not.fully.paid
0    522
1    107
dtype: int64

Percent who have fully paid: 0.8298887122416534
In [257]:
#MAJOR PURCHASE
df_majorPurch.describe()
Out[257]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000 437.000000
mean 5046.752860 0.848970 0.114171 243.483913 10.844895 10.159886 722.503432 4325.969108 7281.453089 32.457895 1.562929 0.173913 0.048055 0.112128
std 2572.951451 0.358488 0.027445 179.320592 0.644325 6.630960 41.098429 2665.433103 11189.595841 27.101806 2.181826 0.568239 0.295186 0.315886
min 15.000000 0.000000 0.070500 30.940000 8.699515 0.000000 642.000000 180.041667 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2889.000000 1.000000 0.089400 124.880000 10.463103 4.480000 687.000000 2501.000000 906.000000 7.700000 0.000000 0.000000 0.000000 0.000000
50% 5360.000000 1.000000 0.115800 198.780000 10.828042 9.510000 722.000000 3815.041667 3309.000000 29.300000 1.000000 0.000000 0.000000 0.000000
75% 7106.000000 1.000000 0.134800 297.380000 11.167261 15.100000 757.000000 5491.000000 8950.000000 49.100000 2.000000 0.000000 0.000000 0.000000
max 9531.000000 1.000000 0.201700 898.550000 14.180154 26.150000 817.000000 15360.000000 111115.000000 99.600000 20.000000 4.000000 4.000000 1.000000
In [309]:
#credit policy for major purchase
df_majorPurch['credit.policy'].value_counts().plot(kind='pie')

print(df_majorPurch.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(66/(66+371)))
credit.policy
0     66
1    371
dtype: int64

Percent who do not have a credit policy: 0.15102974828375287
In [317]:
#fully paid for major purchase
df_majorPurch['not.fully.paid'].value_counts().plot(kind='pie')

print(df_majorPurch.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(388/(388+49)))
not.fully.paid
0    388
1     49
dtype: int64

Percent who have fully paid: 0.8878718535469108
In [258]:
#SMALL BUSINESS
df_smallBus.describe()
Out[258]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 619.000000 619.000000 619.000000 619.000000 619.000000 619.000000 619.000000 619.000000 6.190000e+02 619.000000 619.000000 619.000000 619.000000 619.000000
mean 4519.579968 0.799677 0.138087 433.826607 11.146216 10.793296 719.988691 4892.115778 2.758120e+04 40.070436 1.933764 0.155089 0.056543 0.277868
std 2805.495907 0.400566 0.031191 248.585838 0.632436 6.932821 37.768905 2636.567898 7.674016e+04 30.081828 2.481741 0.474446 0.238051 0.448310
min 13.000000 0.000000 0.071400 16.250000 8.987197 0.000000 642.000000 210.000000 0.000000e+00 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2154.000000 1.000000 0.114600 226.390000 10.779997 5.025000 692.000000 3120.000000 2.400500e+03 12.000000 0.000000 0.000000 0.000000 0.000000
50% 4030.000000 1.000000 0.137900 394.360000 11.156251 10.390000 717.000000 4559.041667 7.815000e+03 36.200000 1.000000 0.000000 0.000000 0.000000
75% 7115.500000 1.000000 0.160000 628.215000 11.512925 16.130000 747.000000 6149.979166 2.486800e+04 65.200000 3.000000 0.000000 0.000000 1.000000
max 9554.000000 1.000000 0.212100 926.830000 13.331002 29.210000 822.000000 16350.000000 1.207359e+06 105.200000 27.000000 4.000000 2.000000 1.000000
In [313]:
#credit policy for small business
df_smallBus['credit.policy'].value_counts().plot(kind='pie')

print(df_smallBus.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(124/(124+495)))
credit.policy
0    124
1    495
dtype: int64

Percent who do not have a credit policy: 0.20032310177705978
In [316]:
#fully paid for small business
df_smallBus['not.fully.paid'].value_counts().plot(kind='pie')

print(df_smallBus.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(447/(447+172)))
not.fully.paid
0    447
1    172
dtype: int64

Percent who have fully paid: 0.7221324717285945

By fully paid

In [12]:
#by fully paid
notFullyPaid =  df['not.fully.paid']==1
df_notFullyPaid = df[notFullyPaid]

fullyPaid = df['not.fully.paid']==0
df_fullyPaid = df[fullyPaid]

df_notFullyPaid = df_notFullyPaid.reset_index()
df_fullyPaid = df_fullyPaid.reset_index()
In [13]:
#NOT FULLY PAID
df_notFullyPaid.describe()
Out[13]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 1533.000000 1533.000000 1533.000000 1533.000000 1533.000000 1533.000000 1533.000000 1533.000000 1.533000e+03 1533.000000 1533.000000 1533.000000 1533.000000 1533.0
mean 5305.644488 0.661448 0.132452 342.785114 10.885023 13.195838 697.828441 4393.541259 2.106629e+04 52.255075 2.330724 0.174821 0.091324 1.0
std 2998.101489 0.473372 0.025495 223.948527 0.666718 7.006769 33.756808 2431.785491 4.990569e+04 29.057906 2.933480 0.520562 0.292659 0.0
min 6.000000 0.000000 0.070500 15.910000 7.600902 0.000000 617.000000 180.041667 0.000000e+00 0.000000 0.000000 0.000000 0.000000 1.0
25% 2601.000000 0.000000 0.115400 168.640000 10.491274 7.830000 672.000000 2759.958333 3.323000e+03 29.900000 0.000000 0.000000 0.000000 1.0
50% 5620.000000 1.000000 0.131600 287.310000 10.878047 13.340000 692.000000 4050.000000 8.850000e+03 53.900000 1.000000 0.000000 0.000000 1.0
75% 8124.000000 1.000000 0.148200 491.300000 11.276633 18.830000 717.000000 5580.041667 2.061600e+04 77.000000 3.000000 0.000000 0.000000 1.0
max 9577.000000 1.000000 0.216400 926.830000 13.458836 29.960000 822.000000 15692.000000 1.207359e+06 106.500000 33.000000 4.000000 2.000000 1.0
In [344]:
#purpose for w/ not fully paid
df_notFullyPaid['purpose'].value_counts().plot(kind='pie')
df_notFullyPaid.groupby('purpose').size()
Out[344]:
purpose
all_other             387
credit_card           146
debt_consolidation    603
educational            69
home_improvement      107
major_purchase         49
small_business        172
dtype: int64
In [347]:
#credit policy for not fully paid
df_notFullyPaid['credit.policy'].value_counts().plot(kind='pie')

print(df_notFullyPaid.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(519/(519+1014)))
credit.policy
0     519
1    1014
dtype: int64

Percent who do not have a credit policy: 0.3385518590998043
In [14]:
#FULLY PAID
df_fullyPaid.describe()
Out[14]:
index credit.policy int.rate installment log.annual.inc dti fico days.with.cr.line revol.bal revol.util inq.last.6mths delinq.2yrs pub.rec not.fully.paid
count 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.000000 8045.0
mean 4689.956495 0.832318 0.120770 314.574123 10.941091 12.494413 713.326911 4592.632624 16122.724425 45.759608 1.433934 0.161591 0.056557 0.0
std 2707.422502 0.373607 0.026692 203.398688 0.604045 6.855020 38.223723 2508.035500 29637.645740 28.891285 1.999009 0.550974 0.255537 0.0
min 0.000000 0.000000 0.060000 15.670000 7.547502 0.000000 612.000000 178.958333 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
25% 2367.000000 1.000000 0.099600 162.250000 10.584056 7.120000 682.000000 2820.041667 3171.000000 21.400000 0.000000 0.000000 0.000000 0.0
50% 4649.000000 1.000000 0.121800 266.520000 10.933178 12.530000 707.000000 4140.041667 8535.000000 44.800000 1.000000 0.000000 0.000000 0.0
75% 6958.000000 1.000000 0.138700 423.950000 11.304720 17.800000 742.000000 5760.000000 17905.000000 69.800000 2.000000 0.000000 0.000000 0.0
max 9568.000000 1.000000 0.212100 940.140000 14.528354 29.950000 827.000000 17639.958330 952013.000000 119.000000 31.000000 13.000000 5.000000 0.0
In [349]:
#purpose for w/ fully paid
df_fullyPaid['purpose'].value_counts().plot(kind='pie')
df_fullyPaid.groupby('purpose').size()
Out[349]:
purpose
all_other             1944
credit_card           1116
debt_consolidation    3354
educational            274
home_improvement       522
major_purchase         388
small_business         447
dtype: int64
In [351]:
#credit policy for not fully paid
df_fullyPaid['credit.policy'].value_counts().plot(kind='pie')

print(df_fullyPaid.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(1349/(1349+6696)))
credit.policy
0    1349
1    6696
dtype: int64

Percent who do not have a credit policy: 0.16768178993163455
In [89]:
#interest rate by installment
#not fully paid
x = df_notFullyPaid['int.rate']
y = df_notFullyPaid['installment']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.title('Correlating interest rates and monthly installment payments for not fully paid loans')
plt.xlabel('Interest Rate')
plt.ylabel('Monthly Installment Payments')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['int.rate']
y = df_fullyPaid['installment']

df_fullyPaid.plot.scatter(x='int.rate', y='installment', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.title('Correlating interest rates and monthly installment payments for fully paid loans')
plt.xlabel('Interest Rate')
plt.ylabel('Monthly Installment Payments')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 2266.2695937316053
Intercept is: 42.61260625017172
Slope is: 2097.4351044799173
Intercept is: 61.266220671921104
In [356]:
#interest rate by annual income
df_notFullyPaid.plot(x='int.rate', y='log.annual.inc', style='o')
df_fullyPaid.plot(x='int.rate', y='log.annual.inc', style='rx')
Out[356]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a206ae4d0>
In [359]:
#interest rate by debt to income
#not fully paid
x = df_notFullyPaid['int.rate']
y = df_notFullyPaid['dti']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
df_fullyPaid.plot(x='int.rate', y='dti', style='rx')
Slope is: 47.44573348201676
Intercept is: 6.911544049895588
Out[359]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e8c6350>
In [90]:
#interest rate by fico
#not fully paid
x = df_notFullyPaid['int.rate']
y = df_notFullyPaid['fico']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.title('Correlating interest rates and fico scores for not fully paid loans')
plt.xlabel('Interest Rate')
plt.ylabel('FICO Score')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['int.rate']
y = df_fullyPaid['fico']

df_fullyPaid.plot.scatter(x='int.rate', y='fico', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')


plt.title('Correlating interest rates and fico scores for fully paid loans')
plt.xlabel('Interest Rate')
plt.ylabel('FICO Score')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -790.1172975206548
Intercept is: 802.4812551725119
Slope is: -1039.6918854115188
Intercept is: 838.890829674152
In [85]:
#interest rate by days with credit
#not fully paid
df_notFullyPaid.plot(x='int.rate', y='days.with.cr.line', style='o')

#fully paid
x = df_fullyPaid['int.rate']
y = df_fullyPaid['days.with.cr.line']

df_fullyPaid.plot(x='int.rate', y='days.with.cr.line', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -12482.363231242729
Intercept is: 6100.131587723542
In [364]:
#interest rate by revolving balance
df_notFullyPaid.plot(x='int.rate', y='revol.bal', style='o')
df_fullyPaid.plot(x='int.rate', y='revol.bal', style='rx')
Out[364]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21e95750>
In [83]:
#interest rate by utilization rate
#not fully paid
x = df_notFullyPaid['int.rate']
y = df_notFullyPaid['revol.util']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['int.rate']
y = df_fullyPaid['revol.util']

df_fullyPaid.plot.scatter(x='int.rate', y='revol.util', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 375.7330290076137
Intercept is: 2.4883897411117673
Slope is: 522.7224119565362
Intercept is: -17.369742925322193
In [366]:
#interest rate by inquisition by last 6 months
df_notFullyPaid.plot(x='int.rate', y='inq.last.6mths', style='o')
df_fullyPaid.plot(x='int.rate', y='inq.last.6mths', style='rx')
Out[366]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1df06c90>
In [91]:
#installment by annual income
#not fully paid
x = df_notFullyPaid['installment']
y = df_notFullyPaid['log.annual.inc']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.title('Correlating monthly installment payments and annual income for not fully paid loans')
plt.xlabel('Monthly Installment Payments')
plt.ylabel('Annual Income')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['installment']
y = df_fullyPaid['log.annual.inc']

df_fullyPaid.plot.scatter(x='installment', y='log.annual.inc', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.title('Correlating monthly installment payments and annual income for fully paid loans')
plt.xlabel('Monthly Installment Payments')
plt.ylabel('Annual Income')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 0.0015407402244870816
Intercept is: 10.35687981086574
Slope is: 0.001292120629599157
Intercept is: 10.534623431160133
In [368]:
#installment by debt to income
df_notFullyPaid.plot(x='installment', y='dti', style='o')
df_fullyPaid.plot(x='installment', y='dti', style='rx')
Out[368]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21d19510>
In [369]:
#installment by fico
df_notFullyPaid.plot(x='installment', y='fico', style='o')
df_fullyPaid.plot(x='installment', y='fico', style='rx')
Out[369]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a203dc350>
In [370]:
#installment by days w/ credit
df_notFullyPaid.plot(x='installment', y='days.with.cr.line', style='o')
df_fullyPaid.plot(x='installment', y='days.with.cr.line', style='rx')
Out[370]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21e367d0>
In [371]:
#installment by revolving balance
df_notFullyPaid.plot(x='installment', y='revol.bal', style='o')
df_fullyPaid.plot(x='installment', y='revol.bal', style='rx')
Out[371]:
<matplotlib.axes._subplots.AxesSubplot at 0x113ed7650>
In [372]:
#installment by revolving balance
df_notFullyPaid.plot(x='installment', y='revol.util', style='o')
df_fullyPaid.plot(x='installment', y='revol.util', style='rx')
Out[372]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1fb76cd0>
In [373]:
#installment by inquisitions in last 6 months
df_notFullyPaid.plot(x='installment', y='inq.last.6mths', style='o')
df_fullyPaid.plot(x='installment', y='inq.last.6mths', style='rx')
Out[373]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1dea6cd0>
In [374]:
#annual income by debt to income
df_notFullyPaid.plot(x='log.annual.inc', y='dti', style='o')
df_fullyPaid.plot(x='log.annual.inc', y='dti', style='rx')
Out[374]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a207ad990>
In [375]:
#annual income by debt to income
df_notFullyPaid.plot(x='log.annual.inc', y='fico', style='o')
df_fullyPaid.plot(x='log.annual.inc', y='fico', style='rx')
Out[375]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21e23890>
In [92]:
#annual income by days w/ credit
#not fully paid
x = df_notFullyPaid['log.annual.inc']
y = df_notFullyPaid['days.with.cr.line']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.title('Correlating annual income and days within credit line for not fully paid loans')
plt.xlabel('Annual Income')
plt.ylabel('Days Within Credit Line')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['log.annual.inc']
y = df_fullyPaid['days.with.cr.line']

df_fullyPaid.plot.scatter(x='log.annual.inc', y='days.with.cr.line', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.title('Correlating annual income and days within credit line for fully paid loans')
plt.xlabel('Annual Income')
plt.ylabel('Days Within Credit Line')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 1281.6067877438052
Intercept is: -9556.777621476578
Slope is: 1385.3215649570545
Intercept is: -10564.296883680961
In [96]:
#annual income by revolving balance
#not fully paid
x = df_notFullyPaid['log.annual.inc']
y = df_notFullyPaid['revol.bal']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.title('Correlating annual income and revolving balance for not fully paid loans')
plt.xlabel('Annual Income')
plt.ylabel('Revolving Balance')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['log.annual.inc']
y = df_fullyPaid['revol.bal']

df_fullyPaid.plot.scatter(x='log.annual.inc', y='revol.bal', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.title('Correlating annual income and revolving balance for fully paid loans')
plt.xlabel('Annual Income')
plt.ylabel('Revolving Balance')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 30568.559707531356
Intercept is: -311673.1704758444
Slope is: 18230.18236409874
Intercept is: -183335.36241231166
In [95]:
#annual income by utilization rate
df_notFullyPaid.plot(x='log.annual.inc', y='revol.util', style='o')
df_fullyPaid.plot(x='log.annual.inc', y='revol.util', style='rx')
Out[95]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a22e67f50>
In [379]:
#annual income by inquisitions in last 6 months
df_notFullyPaid.plot(x='log.annual.inc', y='inq.last.6mths', style='o')
df_fullyPaid.plot(x='log.annual.inc', y='inq.last.6mths', style='rx')
Out[379]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a20945c50>
In [381]:
#debt to income by fico
#not fully paid
x = df_notFullyPaid['dti']
y = df_notFullyPaid['fico']

plt.scatter(x, y, color="blue")
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['dti']
y = df_fullyPaid['fico']

df_fullyPaid.plot(x='dti', y='fico', style='rx')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color="blue")

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -0.8801907618012424
Intercept is: 709.4432958659132
Slope is: -1.385130506952868
Intercept is: 730.633303292624
In [382]:
#debt to income by days w/ credit
df_notFullyPaid.plot(x='dti', y='days.with.cr.line', style='o')
df_fullyPaid.plot(x='dti', y='days.with.cr.line', style='rx')
Out[382]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21041fd0>
In [384]:
#debt to income by revolving balance
df_notFullyPaid.plot(x='dti', y='revol.bal', style='o')
df_fullyPaid.plot(x='dti', y='revol.bal', style='rx')
Out[384]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1df7a590>
In [386]:
#debt to income by utilization rate
df_notFullyPaid.plot(x='dti', y='revol.util', style='o')
df_fullyPaid.plot(x='dti', y='revol.util', style='rx')
Out[386]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21423e50>
In [387]:
#debt to income by inquisitions in last 6 months
df_notFullyPaid.plot(x='dti', y='inq.last.6mths', style='o')
df_fullyPaid.plot(x='dti', y='inq.last.6mths', style='rx')
Out[387]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a200dec10>
In [97]:
#fico by days w/ credit
#not fully paid
x = df_notFullyPaid['fico']
y = df_notFullyPaid['days.with.cr.line']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.title('Correlating FICO scores and days within credit line for not fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Days Within Credit Line')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['fico']
y = df_fullyPaid['days.with.cr.line']

df_fullyPaid.plot.scatter(x='fico', y='days.with.cr.line', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.title('Correlating FICO scores and days within credit line for fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Days Within Credit Line')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 17.361374665912866
Intercept is: -7721.719757174802
Slope is: 17.469955432956944
Intercept is: -7869.156722692853
In [390]:
#fico by revolving balance
df_notFullyPaid.plot(x='fico', y='revol.bal', style='o')
df_fullyPaid.plot(x='fico', y='revol.bal', style='rx')
Out[390]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1faf9d90>
In [98]:
#fico by utilization rate
#not fully paid
x = df_notFullyPaid['fico']
y = df_notFullyPaid['revol.util']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.title('Correlating FICO scores and utilization rate for not fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Utilization Rate')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['fico']
y = df_fullyPaid['revol.util']

df_fullyPaid.plot.scatter(x='fico', y='revol.util', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.title('Correlating FICO scores and utilization rate for fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Utilization Rate')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -0.35771548253579727
Intercept is: 301.8791125034588
Slope is: -0.42177420151280726
Intercept is: 346.62249680976623
In [101]:
#fico by days w/ credit
#not fully paid
x = df_notFullyPaid['fico']
y = df_notFullyPaid['days.with.cr.line']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['fico']
y = df_fullyPaid['days.with.cr.line']

df_fullyPaid.plot.scatter(x='fico', y='days.with.cr.line', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: 17.361374665912866
Intercept is: -7721.719757174802
Slope is: 17.469955432956944
Intercept is: -7869.156722692853
In [102]:
#fico by inquisitions in last 6 months
#not fully paid
x = df_notFullyPaid['fico']
y = df_notFullyPaid['inq.last.6mths']

plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')

plt.title('Correlating FICO scores and number of inquiries in the past 6 months for not fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Number of Inquiries')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))

#fully paid
x = df_fullyPaid['fico']
y = df_fullyPaid['inq.last.6mths']

df_fullyPaid.plot.scatter(x='fico', y='inq.last.6mths', color='#32BDD1')
stats = linregress(x, y)

m = stats.slope
b = stats.intercept

plt.plot(x, m * x + b, color='#3D77D6')

plt.title('Correlating FICO scores and number of inquiries in the past 6 months for fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Number of Inquiries')

plt.show()

print("Slope is: " + str(m))
print("Intercept is: " + str(b))
Slope is: -0.013564398296634651
Intercept is: 11.796346986424753
Slope is: -0.00907682098054344
Intercept is: 7.908674793456723
In [73]:
#revolving balance by utilization rate
df_notFullyPaid.plot.scatter(x='revol.bal', y='revol.util', color='#3D77D6')
df_fullyPaid.plot.scatter(x='revol.bal', y='revol.util', color='#32BDD1')
Out[73]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2123b110>
In [72]:
#revolving balance by inquisition in past 6 months
df_notFullyPaid.plot.scatter(x='revol.bal', y='inq.last.6mths', color='#3D77D6')
df_fullyPaid.plot.scatter(x='revol.bal', y='inq.last.6mths', color='#32BDD1')
Out[72]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21045810>
In [70]:
#utilization rate by inquisition in past 6 months
df_notFullyPaid.plot.scatter(x='revol.util', y='inq.last.6mths', color='#3D77D6')
df_fullyPaid.plot.scatter(x='revol.util', y='inq.last.6mths', color='#32BDD1')
Out[70]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a21039bd0>
In [35]:
df_notFullyPaid['int.rate'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of interest rates for not fully paid loans')
plt.xlabel('Interest Rate')
plt.grid(axis='y', alpha=0.75)
In [40]:
df_fullyPaid['int.rate'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of interest rates for fully paid loans')
plt.xlabel('Interest Rate')
plt.grid(axis='y', alpha=0.75)
In [36]:
df_notFullyPaid['installment'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of monthly installments for not fully paid loans')
plt.xlabel('Monthly Installment Amount')
plt.grid(axis='y', alpha=0.75)
In [38]:
df_fullyPaid['installment'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of monthly installments for fully paid loans')
plt.xlabel('Monthly Installment Amount')
plt.grid(axis='y', alpha=0.75)
In [37]:
df_notFullyPaid['log.annual.inc'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of annual income for not fully paid loans')
plt.xlabel('Annual Income')
plt.grid(axis='y', alpha=0.75)
In [39]:
df_fullyPaid['log.annual.inc'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of annual income for fully paid loans')
plt.xlabel('Annual Income')
plt.grid(axis='y', alpha=0.75)
In [57]:
df_notFullyPaid['dti'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of debt to income ratio for not fully paid loans')
plt.xlabel('Debt to Income Ratio')
plt.grid(axis='y', alpha=0.75)
In [56]:
df_fullyPaid['dti'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of debt to income ratio for fully paid loans')
plt.xlabel('Debt to Income Ratio')
plt.grid(axis='y', alpha=0.75)
In [55]:
df_notFullyPaid['fico'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of FICO scores for not fully paid loans')
plt.xlabel('FICO Score')
plt.grid(axis='y', alpha=0.75)
In [52]:
df_fullyPaid['fico'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of FICO scores for fully paid loans')
plt.xlabel('FICO Score')
plt.grid(axis='y', alpha=0.75)
In [50]:
df_notFullyPaid['revol.bal'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of revolving balances for not fully paid loans')
plt.xlabel('Revolving Balance')
plt.grid(axis='y', alpha=0.75)
In [49]:
df_fullyPaid['revol.bal'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of revolving balances for fully paid loans')
plt.xlabel('Revolving Balance')
plt.grid(axis='y', alpha=0.75)
In [58]:
df_notFullyPaid['revol.util'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of utilization rates for not fully paid loans')
plt.xlabel('Utilization Rate')
plt.grid(axis='y', alpha=0.75)
In [59]:
df_fullyPaid['revol.util'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of utilization rates for fully paid loans')
plt.xlabel('Utilization Rate')
plt.grid(axis='y', alpha=0.75)
In [61]:
df_notFullyPaid['inq.last.6mths'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of inquiries within the last six months for not fully paid loans')
plt.xlabel('Number of Inquiries')
plt.grid(axis='y', alpha=0.75)
In [62]:
df_fullyPaid['inq.last.6mths'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of inquiries within the last six months for fully paid loans')
plt.xlabel('Number of Inquiries')
plt.grid(axis='y', alpha=0.75)
In [63]:
df_notFullyPaid['delinq.2yrs'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of delinquencies in last two years for not fully paid loans')
plt.xlabel('Number of Delinquencies')
plt.grid(axis='y', alpha=0.75)
In [64]:
df_fullyPaid['delinq.2yrs'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of delinquencies in last two years for fully paid loans')
plt.xlabel('Number of Delinquencies')
plt.grid(axis='y', alpha=0.75)
In [65]:
df_notFullyPaid['pub.rec'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of derogatory public records for not fully paid loans')
plt.xlabel('Number of Derogatory Public Records')
plt.grid(axis='y', alpha=0.75)
In [66]:
df_fullyPaid['pub.rec'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of derogatory public records for fully paid loans')
plt.xlabel('Number of Derogatory Public Records')
plt.grid(axis='y', alpha=0.75)
In [ ]: